Yushi Tang
March 5/7, 2019
lfs setstripe -c 18 -S 4m /path/to/filesFor this exercise, we will be analyzing a single cell RNA-Seq dataset of Peripheral Blood Mononuclear Cells (PBMC) from the 10X Genomics platform. We will primarily be using the seurat package from the Satija Lab (http://satijalab.org/seurat/pbmc-tutorial.html). The purpose of single cell RNA-Seq analysis is to uncover interesting biology that occurs at a granularity–the single cell–that isn’t appreciated when these features become averaged in bulk. The goal of this analysis is to uncover heterogenity in PBMCs and understanding the analysis workflows for single cell technologies.
First, install/load the packages and the data object
# Install required packages
install.packages('Seurat')
install.packages('dplyr')library(Seurat)
library(Matrix)
library(dplyr)
# Load the PBMC dataset
pbmc <- readRDS("./Data/pbmc.rds")
# View the data structure
dim(pbmc@raw.data)## [1] 32643 2001
# Number of genes
nrow(pbmc@raw.data)## [1] 32643
# Print target gene id
rownames(pbmc@raw.data)[1:10]## [1] "MIR1302-10" "FAM138A" "OR4F5" "RP11-34P13.7"
## [5] "RP11-34P13.8" "AL627309.1" "RP11-34P13.14" "RP11-34P13.9"
## [9] "AP006222.2" "RP4-669L17.10"
# Number of samples
ncol(pbmc@raw.data)## [1] 2001
Note: to achieve this object, the counts matrix had to be determined using a standard alignment protocol similar to bulk RNA-Seq analyses. The .rds object contains a seurat object with 2001 samples and 32,643 genes. This sample set includes roughly 1,000 PBMC samples from two different batches.
# Examine the memory savings between regular and sparse matrix
object.size(as.matrix(pbmc@raw.data))## 524899792 bytes
object.size(pbmc@raw.data)## 21504672 bytes
pbmc <- CreateSeuratObject(raw.data=pbmc@raw.data,min.cells = 3, min.genes = 200,project = "10X_PBMC")
dim(pbmc@data)## [1] 12858 2001
dim(pbmc@raw.data)## [1] 12858 2001
# Examine the memory savings again
object.size(as.matrix(pbmc@raw.data))## 206813880 bytes
object.size(pbmc@raw.data)## 20091616 bytes
When trying to discover rare cell types, one has to be weary of technical confounders that imply heterogenity that are actually false. Two measures of technical confounders are the number of mitochondrial reads mapped as well as the number of unique genes mapped. In this dataset, how many mitochondrial genes are there? How many samples express a number of genes that significantly deviates from the rest? Remove these.
# The number of genes (nGene) and UMIs (nUMI, unique molecular identifiers)
# are automatically calculated. For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell.
# Calculate the percentage of mitochondrial genes as percent.mito
# Use raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.genes <- grep("^MT-", rownames(pbmc@data), value = TRUE)
length(mito.genes)
percent.mito <- colSums(pbmc@data[mito.genes, ])/colSums(pbmc@data)
# Adds columns to object@data.info, and is a great place to stash QC stats
pbmc <- AddMetaData(pbmc, percent.mito, "percent.mito")
VlnPlot(pbmc, c("percent.mito"), nCol = 1)VlnPlot(pbmc, c("nGene", "nUMI", "percent.mito"), nCol = 3)pbmc <- SubsetData(pbmc, subset.name = "nGene", accept.high = 2500)
pbmc <- SubsetData(pbmc, subset.name = "percent.mito", accept.high = 0.05)
# Use GenePlot to visualize gene-gene relationship
GenePlot(object = pbmc, gene1 = "nUMI", gene2 = "percent.mito")GenePlot(object = pbmc, gene1 = "nUMI", gene2 = "nGene")# Cells that have unique gene counts over 2,500 or less than 200
# might be filtered out
pbmc <- FilterCells(object = pbmc, subset.names = c("nGene", "percent.mito"), low.thresholds = c(200, -Inf), high.thresholds = c(2500, 0.05))pbmc <- NormalizeData(object = pbmc, normalization.method = "LogNormalize", scale.factor = 1e4)pbmc <- FindVariableGenes(pbmc,mean.function=ExpMean,dispersion.function = LogVMR, x.low.cutoff = 0.0125, x.high.cutoff = 3, y.cutoff = 0.5, do.contour = FALSE)length(x = pbmc@var.genes)## [1] 1793
# Print the variable genes' ids
# These genes would be used as input for linear dimensioinal reduction below (PCA)
pbmc@var.genes## [1] "ISG15" "TNFRSF4" "CPSF3L"
## [4] "ATAD3C" "MIB2" "C1orf86"
## [7] "RER1" "TPRG1L" "LRRC47"
## [10] "CAMTA1" "TNFRSF9" "TNFRSF1B"
## [13] "SDHB" "CDA" "PINK1"
## [16] "PINK1-AS" "RP3-329E20.2" "C1QA"
## [19] "C1QC" "C1QB" "ZNF436"
## [22] "TCEA3" "PITHD1" "GALE"
## [25] "NIPAL3" "CLIC4" "C1orf63"
## [28] "STMN1" "PDIK1L" "TMEM222"
## [31] "RPA2" "PTAFR" "PHACTR4"
## [34] "YTHDF2" "SPOCD1" "PTP4A2"
## [37] "TMEM234" "HDAC1" "MARCKSL1"
## [40] "BSDC1" "SMIM12" "C1orf216"
## [43] "AGO4" "TRAPPC3" "MYCBP"
## [46] "PPIE" "NFYC" "CITED4"
## [49] "SCMH1" "C1orf50" "ZNF691"
## [52] "KDM4A-AS1" "DMAP1" "ERI3"
## [55] "PRDX1" "TMEM69" "IPP"
## [58] "FAAH" "PDZK1IP1" "TAL1"
## [61] "RNF11" "PRPF38A" "FAM159A"
## [64] "ECHDC2" "YIPF1" "HSPB11"
## [67] "DOCK7" "EFCAB7" "IL23R"
## [70] "ACADM" "ST6GALNAC3" "RP5-887A10.1"
## [73] "PRKACB" "RPF1" "CTBS"
## [76] "C1orf52" "RP4-612B15.3" "SH3GLB1"
## [79] "GBP2" "ZNF644" "DPH5"
## [82] "HENMT1" "RP5-1065J22.8" "TMEM167B"
## [85] "SARS" "STRIP1" "CEPT1"
## [88] "CHI3L2" "LRIG2" "RP5-1073O3.7"
## [91] "FCGR1B" "NOTCH2NL" "LIX1L"
## [94] "POLR3C" "CD160" "NBPF15"
## [97] "RP11-666A1.5" "RP11-277L2.3" "FCGR1A"
## [100] "HIST2H2BE" "HIST2H2AC" "BOLA1"
## [103] "C1orf51" "CTSS" "FAM63A"
## [106] "PRUNE" "MLLT11" "VPS72"
## [109] "RORC" "S100A12" "S100A8"
## [112] "S100A6" "SLC39A1" "HAX1"
## [115] "PMVK" "EFNA3" "LMNA"
## [118] "BGLAP" "APOA1BP" "ISG20L2"
## [121] "SH2D2A" "FCRL2" "FCRL1"
## [124] "CD1C" "AIM2" "FCER1A"
## [127] "FCRL6" "PEA15" "DCAF8"
## [130] "PEX19" "CD48" "F11R"
## [133] "PFDN2" "DEDD" "NDUFS2"
## [136] "FCER1G" "FCGR2A" "HSPA6"
## [139] "FCGR3A" "FCGR3B" "FCRLA"
## [142] "ATF6" "UAP1" "HSD17B7"
## [145] "TADA1" "CD247" "MPC2"
## [148] "TIPRL" "XCL1" "SELL"
## [151] "RALGPS2" "SOAT1" "RP11-533E19.5"
## [154] "QSOX1" "NPL" "DHX9"
## [157] "C1orf21" "IVNS1ABP" "RGS18"
## [160] "RGS1" "RP11-553K8.2" "TIMM17A"
## [163] "CYB5R1" "IL24" "G0S2"
## [166] "BATF3" "NSL1" "BPNT1"
## [169] "FAM177B" "TP53BP2" "CNIH4"
## [172] "SRP9" "LIN9" "PSEN2"
## [175] "OBSCN" "RNF187" "URB2"
## [178] "C1orf198" "GNPAT" "SPRTN"
## [181] "COA6" "ARID4B" "GPR137B"
## [184] "FH" "CHML" "SH3BP5L"
## [187] "PGBD2" "TMEM18" "TSSC1"
## [190] "ODC1" "LPIN1" "NBAS"
## [193] "RDH14" "LAPTM4A" "ITSN2"
## [196] "DNAJC27" "DNMT3A" "ASXL2"
## [199] "KIF3C" "KHK" "PREB"
## [202] "ATRAID" "SNX17" "NRBP1"
## [205] "RBKS" "TRMT61B" "FAM179A"
## [208] "NLRC4" "FAM98A" "NDUFAF7"
## [211] "GALM" "AC083949.1" "PPM1B"
## [214] "FBXO11" "EML6" "MTIF2"
## [217] "BCL11A" "C2orf74" "USP34"
## [220] "AFTPH" "CEP68" "PNO1"
## [223] "GFPT1" "NFU1" "FAM136A"
## [226] "MCEE" "PAIP2B" "ZNF638"
## [229] "CCT7" "STAMBP" "BOLA3"
## [232] "SEMA4F" "MRPL19" "TRABD2A"
## [235] "TGOLN2" "MAT2A" "VAMP8"
## [238] "VAMP5" "GNLY" "CD8A"
## [241] "CD8B" "ZNF2" "CIAO1"
## [244] "ZAP70" "IL18RAP" "FHL2"
## [247] "LIMS1" "CCDC138" "IL1B"
## [250] "RABL2A" "NIFK" "MAP3K2"
## [253] "CCDC115" "ARHGEF4" "PLEKHB2"
## [256] "RAB3GAP1" "UBXN4" "CXCR4"
## [259] "ARL5A" "PRPF40A" "ACVR1"
## [262] "BAZ2B" "CD302" "PSMD14"
## [265] "SLC4A10" "FASTKD1" "METTL8"
## [268] "GPR155" "AC079305.10" "PLEKHA3"
## [271] "TTN-AS1" "DUSP19" "SLC40A1"
## [274] "ANKAR" "C2orf88" "SDPR"
## [277] "ANKRD44" "SF3B1" "MOB4"
## [280] "TYW5" "NOP58" "BMPR2"
## [283] "WDR12" "INO80D" "AC079767.4"
## [286] "CCNYL1" "PECR" "TNS1"
## [289] "USP37" "RNF25" "CYP27A1"
## [292] "TUBA4A" "GMPPA" "CHPF"
## [295] "SERPINE2" "ATG16L1" "RAMP1"
## [298] "UBE2F" "DUSP28" "CAPN10-AS1"
## [301] "GPR35" "MTERFD2" "ING5"
## [304] "PDCD1" "THUMPD3" "SETD5"
## [307] "ARPC4" "RPUSD3" "ATG7"
## [310] "TMEM40" "CAND2" "AC093495.4"
## [313] "SLC6A6" "SH3BP5" "HACL1"
## [316] "UBE2E2" "TGFBR2" "CMTM7"
## [319] "CRTAP" "DLEC1" "ACAA1"
## [322] "MYD88" "ENTPD3-AS1" "CTNNB1"
## [325] "SEC22C" "ZBTB47" "ZNF501"
## [328] "LIMD1" "CCDC12" "NME6"
## [331] "ARIH2OS" "IMPDH2" "QRICH1"
## [334] "USP19" "GPX1" "IP6K1"
## [337] "FAM212A" "RASSF1" "NPRL2"
## [340] "HEMK1" "CISH" "TWF2"
## [343] "GLYCTK" "SMIM4" "CCDC66"
## [346] "DENND6A" "DNASE1L3" "THOC7"
## [349] "RP11-245J9.4" "SUCLG2" "EOGT"
## [352] "TMF1" "ARL6IP5" "LMOD3"
## [355] "GBE1" "C3orf38" "LNP1"
## [358] "BBX" "CD200" "ATP6V1A"
## [361] "TIGIT" "B4GALT4" "FSTL1"
## [364] "HGD" "GOLGB1" "EAF2"
## [367] "PTPLB" "MYLK" "GATA2"
## [370] "RAB7A" "GP9" "ISY1"
## [373] "NEK11" "UBA5" "RAB6B"
## [376] "ZBTB38" "EIF2A" "RP11-103G8.2"
## [379] "GPR171" "MBNL1-AS1" "DHX36"
## [382] "RP11-305K5.1" "MFSD1" "SMC4"
## [385] "SPTSSB" "RPL22L1" "RP11-185E8.1"
## [388] "LAMP3" "YEATS2" "PARL"
## [391] "AP2M1" "ECE2" "PSMD2"
## [394] "SENP2" "CCDC50" "FAM43A"
## [397] "RNF168" "FYTTD1" "PDE6B"
## [400] "SPON2" "SLBP" "LETM1"
## [403] "WHSC1" "MFSD10" "ZBTB49"
## [406] "JAKMIP1" "AC093323.3" "RP11-539L10.2"
## [409] "S100P" "GRPEL1" "FGFBP2"
## [412] "LAP3" "MED28" "PACRGL"
## [415] "SEL1L3" "DTHD1" "SMIM14"
## [418] "UBE2K" "GNPDA2" "TEC"
## [421] "DCUN1D4" "SCFD2" "CHIC2"
## [424] "TMEM165" "PAICS" "HOPX"
## [427] "STAP1" "YTHDC1" "IGJ"
## [430] "IL8" "PF4" "PPBP"
## [433] "CXCL3" "CXCL2" "RCHY1"
## [436] "CXCL10" "SEPT11" "HNRNPDL"
## [439] "LIN54" "COPS4" "HELQ"
## [442] "MRPS18C" "SNCA" "SMARCAD1"
## [445] "METAP1" "DAPP1" "MANBA"
## [448] "PPA2" "INTS12" "GSTCD"
## [451] "OSTC" "PLA2G12A" "GAR1"
## [454] "C4orf32" "SNHG8" "METTL14"
## [457] "C4orf3" "SPRY1" "PGRMC2"
## [460] "MGST2" "ZNF330" "LSM6"
## [463] "NR3C2" "LRBA" "RP11-18H21.1"
## [466] "NEK1" "RWDD4" "KIAA1430"
## [469] "LRP2BP" "PDCD6" "C5orf55"
## [472] "CTD-2228K2.5" "MRPL36" "C5orf42"
## [475] "FYB" "DAB2" "NNT-AS1"
## [478] "NNT" "EMB" "NDUFS4"
## [481] "GZMK" "GZMA" "CTD-2031P19.3"
## [484] "MIER3" "ZSWIM6" "PIK3R1"
## [487] "NSA2" "F2R" "WDR41"
## [490] "DHFR" "ZCCHC9" "TMEM161B-AS1"
## [493] "POLR3G" "ARRDC3" "RHOBTB3"
## [496] "LNPEP" "RIOK2" "CHD1"
## [499] "FAM174A" "FBXL17" "CCDC112"
## [502] "LMNB1" "SLC12A2" "RAD50"
## [505] "KIF3A" "C5orf15" "CTB-113I20.2"
## [508] "PPP2CA" "CDKL3" "CAMLG"
## [511] "DDX46" "EGR1" "MZB1"
## [514] "DNAJC18" "HBEGF" "SRA1"
## [517] "APBB3" "CD14" "WDR55"
## [520] "HARS" "DIAPH1" "NDFIP1"
## [523] "PRELID2" "CSF1R" "IRGM"
## [526] "GPX3" "CCDC69" "SPARC"
## [529] "FAXDC2" "HAVCR2" "EBF1"
## [532] "RP11-175K6.1" "UBLCP1" "SPDL1"
## [535] "ATP6V0E1" "UIMC1" "NSD1"
## [538] "DOK3" "RUFY1" "SQSTM1"
## [541] "ZFP62" "TRIM7" "DUSP22"
## [544] "EXOC2" "RP11-532F6.3" "GMDS"
## [547] "LINC01011" "ECI2" "CDYL"
## [550] "LYRM4" "F13A1" "TMEM14C"
## [553] "TMEM14B" "TBC1D7" "MCUR1"
## [556] "JARID2" "SOX4" "RP11-367G6.3"
## [559] "TRIM38" "HIST1H2AC" "HIST1H1E"
## [562] "HIST1H2BF" "BTN3A1" "ABT1"
## [565] "HIST1H2BJ" "HIST1H2AG" "HIST1H2BN"
## [568] "HIST1H1B" "RP1-313I6.12" "ZKSCAN3"
## [571] "MRPS18B" "XXbac-BPG252P9.9" "FLOT1"
## [574] "IER3" "NFKBIL1" "LTB"
## [577] "LST1" "NCR3" "AIF1"
## [580] "CSNK2B" "LY6G5C" "LY6G6F"
## [583] "CLIC1" "LSM2" "HSPA1L"
## [586] "EHMT2" "DXO" "AGPAT1"
## [589] "GPSM3" "HLA-DRB1" "HLA-DQA1"
## [592] "HLA-DQB1" "HLA-DOB" "HLA-DMB"
## [595] "HLA-DMA" "HLA-DPA1" "HLA-DPB1"
## [598] "HSD17B8" "B3GALT4" "CUTA"
## [601] "DEF6" "PPARD" "FKBP5"
## [604] "SRPK1" "STK38" "SRSF3"
## [607] "RNF8" "TREML1" "TBCC"
## [610] "GLTSCR1L" "PTCRA" "KLC4"
## [613] "DNPH1" "ABCC10" "YIPF3"
## [616] "GTPBP2" "PLA2G7" "MCM3"
## [619] "GSTA4" "ELOVL5" "DST"
## [622] "ZNF451" "PRIM2" "PHF3"
## [625] "SLC17A5" "SH3BGRL2" "SNX14"
## [628] "SLC35A1" "LYRM2" "UFL1"
## [631] "NDUFAF4" "PRDM1" "AK9"
## [634] "CDC40" "AMD1" "TRAF3IP2"
## [637] "FAM229B" "HDAC2" "ZUFSP"
## [640] "ASF1A" "ECHDC1" "THEMIS"
## [643] "ARHGAP18" "LINC01013" "TBPL1"
## [646] "RP3-325F22.5" "SF3B5" "RAB32"
## [649] "UST" "PPIL4" "GINM1"
## [652] "SNX9" "DYNLT1" "EZR"
## [655] "RP11-13P5.1" "SFT2D1" "AC147651.3"
## [658] "LFNG" "BRAT1" "IQCE"
## [661] "RBAK-RBAKDN" "CYTH3" "DAGLB"
## [664] "C7orf26" "PHF14" "TSPAN13"
## [667] "IL6" "TRA2A" "HNRNPA2B1"
## [670] "FKBP14" "ELMO1" "GPR141"
## [673] "RP11-121A8.1" "STK17A" "MRPS24"
## [676] "DDX56" "LANCL2" "VOPP1"
## [679] "CCT6A" "GUSB" "TPST1"
## [682] "RP11-792A8.4" "SBDS" "AUTS2"
## [685] "WBSCR27" "LAT2" "RFC2"
## [688] "CLIP2" "HIP1" "GSAP"
## [691] "PTPN12" "RSBN1L" "CD36"
## [694] "DMTF1" "ABCB4" "RBM48"
## [697] "SAMD9L" "GNG11" "ZNF394"
## [700] "ZNF3" "MBLAC1" "PILRB"
## [703] "TSC22D4" "LRCH4" "MOSPD3"
## [706] "CUX1" "LRWD1" "DNAJC2"
## [709] "SYPL1" "HBP1" "BCAP29"
## [712] "PNPLA8" "IFRD1" "ATP6V1F"
## [715] "NRF1" "RP11-775D22.2" "MEST"
## [718] "AC016831.7" "MKLN1" "CHCHD3"
## [721] "AKR1B1" "WDR91" "SSBP1"
## [724] "ZNF777" "ZBED6CL" "REPIN1"
## [727] "GIMAP5" "TMEM176B" "CHPF2"
## [730] "PAXIP1-AS1" "WDR60" "GTPBP6"
## [733] "GPM6B" "RBBP7" "PDHA1"
## [736] "SH3KBP1" "SMS" "PRDX4"
## [739] "SAT1" "POLA1" "GPR34"
## [742] "FUNDC1" "KRBOX4" "CDK16"
## [745] "PORCN" "PQBP1" "WDR45"
## [748] "GPKOW" "SYP" "CCDC22"
## [751] "PJA1" "TAF1" "MAGEE1"
## [754] "P2RY10" "ITM2A" "SYTL4"
## [757] "ARMCX5" "TCEAL8" "BEX2"
## [760] "NGFRAP1" "TCEAL4" "TCEAL3"
## [763] "PSMD10" "NXT2" "WDR44"
## [766] "PGRMC1" "C1GALT1C1" "OCRL"
## [769] "ZDHHC9" "FAM122B" "FAM122C"
## [772] "ZNF449" "DDX26B" "ATP11C"
## [775] "IDH3G" "PDZD4" "TKTL1"
## [778] "FLNA" "SLC10A3" "MPP1"
## [781] "FBXO25" "CTD-2336O2.1" "BLK"
## [784] "LONRF1" "DMTN" "FAM160B2"
## [787] "SLC39A14" "R3HCC1" "CLU"
## [790] "FZD3" "DUSP4" "RP11-489E7.4"
## [793] "FUT10" "PPAPDC1B" "FGFR1"
## [796] "TACC1" "CTD-2544N14.3" "IDO1"
## [799] "GOLGA7" "SMIM19" "FNTA"
## [802] "POMK" "CEBPD" "PCMTD1"
## [805] "AC090186.1" "TMEM68" "RP11-25K19.1"
## [808] "C8orf44" "RP11-383H13.1" "PKIA"
## [811] "ZBTB10" "RP11-1149M10.2" "CA13"
## [814] "CA2" "DECR1" "PDP1"
## [817] "NIPAL2" "RP11-410L14.2" "POLR2K"
## [820] "SPAG1" "PKHD1L1" "MED30"
## [823] "AC023590.1" "DERL1" "TMEM65"
## [826] "SQLE" "MYC" "CASC7"
## [829] "PTK2" "TSNARE1" "LYPD2"
## [832] "TOP1MT" "NAPRT1" "MAF1"
## [835] "VPS28" "COMMD5" "RLN2"
## [838] "KLHL9" "CDKN2A" "PLAA"
## [841] "DNAJA1" "UBE2R2" "ENHO"
## [844] "STOML2" "CCDC107" "CREB3"
## [847] "FAM201A" "FXN" "TLE4"
## [850] "SPIN1" "NFIL3" "FAM120A"
## [853] "ERCC6L2" "CDC14B" "C9orf156"
## [856] "HEMGN" "ERP44" "INVS"
## [859] "SMC2" "NIPSNAP3B" "FSD1L"
## [862] "CTNNAL1" "INIP" "ZNF883"
## [865] "CDK5RAP2" "GSN" "SCAI"
## [868] "RALGPS1" "ZNF79" "PTGES2"
## [871] "C9orf16" "RP11-339B21.13" "COQ4"
## [874] "CCBL1" "TOR1A" "RP11-544A12.8"
## [877] "POMT1" "GTF3C5" "GBGT1"
## [880] "SURF1" "SARDH" "RXRA"
## [883] "FCN1" "C9orf69" "RABL6"
## [886] "PTGDS" "C9orf142" "CLIC3"
## [889] "NPDC1" "MRPL41" "C9orf37"
## [892] "EHMT1" "PITRM1" "KLF6"
## [895] "AKR1C3" "ANKRD16" "ATP5C1"
## [898] "SEC61A2" "NUDT5" "CAMK1D"
## [901] "PTER" "RP11-499P20.2" "NSUN6"
## [904] "MLLT10" "COMMD3" "ARHGAP21"
## [907] "PTCHD3P1" "MAP3K8" "KIF5B"
## [910] "RP11-324I22.4" "ZNF37A" "ZNF32"
## [913] "ZFAND4" "NCOA4" "LINC00843"
## [916] "CISD1" "UBE2D1" "ARID5B"
## [919] "RTKN2" "REEP3" "SIRT1"
## [922] "PBLD" "HNRNPH3" "AIFM2"
## [925] "PRF1" "PSAP" "ANAPC16"
## [928] "ZSWIM8" "RP11-119F19.2" "NUTM2A-AS1"
## [931] "ATAD1" "IFIT2" "IFIT1"
## [934] "MARCH5" "SLC35G1" "HELLS"
## [937] "ENTPD1" "BLNK" "ARHGAP19"
## [940] "MRPL43" "FBXW4" "KCNIP2"
## [943] "TMEM180" "GSTO1" "XPNPEP1"
## [946] "SHOC2" "VTI1A" "FAM204A"
## [949] "FAM45A" "PRDX3" "TIAL1"
## [952] "PLEKHA1" "ZRANB1" "DHX32"
## [955] "GLRX3" "STK32C" "ADAM8"
## [958] "CALY" "SYCE1" "SCGB1C1"
## [961] "IFITM3" "PTDSS2" "RP11-496I9.1"
## [964] "PHRF1" "TMEM80" "PIDD"
## [967] "CD151" "RP11-1391J7.1" "MOB2"
## [970] "TNNI2" "KCNQ1OT1" "NAP1L4"
## [973] "CARS" "ZNF195" "STIM1"
## [976] "RRM1" "APBB1" "TMEM9B"
## [979] "DENND5A" "IPO7" "SWAP70"
## [982] "PDE3B" "SAAL1" "C11orf74"
## [985] "API5" "EXT2" "CD82"
## [988] "PEX16" "DGKZ" "CKAP5"
## [991] "C11orf49" "SPI1" "MTCH2"
## [994] "PTPRJ" "TMX2" "MS4A1"
## [997] "SDHAF2" "FADS1" "TAF6L"
## [1000] "LGALS12" "PLA2G16" "FERMT3"
## [1003] "NUDT22" "BAD" "PRDX5"
## [1006] "AP001462.6" "VPS51" "SYVN1"
## [1009] "AP003068.23" "AP000769.1" "SCYL1"
## [1012] "CTSW" "KLC2" "RAB1B"
## [1015] "ZDHHC24" "RBM4B" "RCE1"
## [1018] "TBC1D10C" "AP003419.16" "PTPRCAP"
## [1021] "CORO1B" "GSTP1" "NDUFS8"
## [1024] "MRPL21" "ORAOV1" "NADSYN1"
## [1027] "LAMTOR1" "PAAF1" "PGM2L1"
## [1030] "RP11-111M22.2" "AP001189.4" "DKFZP434E1119"
## [1033] "CLNS1A" "NDUFC2" "RAB30-AS1"
## [1036] "CCDC90B" "TMEM126B" "PRSS23"
## [1039] "RAB38" "CWC15" "ENDOD1"
## [1042] "MTMR2" "BIRC3" "RDX"
## [1045] "C11orf57" "REXO2" "FXYD2"
## [1048] "CD3D" "KMT2A" "HMBS"
## [1051] "OAF" "NRGN" "ESAM"
## [1054] "ROBO3" "FEZ1" "RPUSD4"
## [1057] "SRPR" "ETS1" "JAM3"
## [1060] "VPS26B" "THYN1" "WNT5B"
## [1063] "FKBP4" "TULP3" "TSPAN9"
## [1066] "CCND2" "NDUFA9" "CD9"
## [1069] "CD27" "IFFO1" "ACRBP"
## [1072] "PTMS" "LAG3" "SPSB2"
## [1075] "ATN1" "C12orf57" "EMG1"
## [1078] "U47924.30" "C1RL-AS1" "CLSTN3"
## [1081] "CD163" "CLEC4E" "KLRG1"
## [1084] "KLRF1" "CLEC2B" "CLEC1B"
## [1087] "KLRC2" "KLRC1" "RP11-291B21.2"
## [1090] "GOLT1B" "C12orf39" "LYRM5"
## [1093] "PPFIBP1" "DENND5B" "METTL20"
## [1096] "RP11-428G5.5" "ALG10" "SLC2A13"
## [1099] "PRICKLE1" "IRAK4" "NELL2"
## [1102] "PCED1B" "SLC48A1" "RHEBL1"
## [1105] "BCDIN3D" "SMARCD1" "KRT1"
## [1108] "IGFBP6" "NFE2" "NCKAP1L"
## [1111] "CDK2" "RAB5B" "SMARCC2"
## [1114] "IL23A" "BAZ2A" "PTGES3"
## [1117] "LRP1" "DTX3" "METTL21B"
## [1120] "CTDSP2" "USP15" "CAND1"
## [1123] "IFNG" "RAB21" "TBC1D15"
## [1126] "NAP1L1" "PAWR" "METTL25"
## [1129] "TMTC2" "LINC00936" "RP11-693J15.5"
## [1132] "EEA1" "NDUFA12" "C12orf45"
## [1135] "BTBD11" "PWP1" "CORO1C"
## [1138] "SSH1" "USP30" "GLTP"
## [1141] "C12orf76" "VPS29" "NAA25"
## [1144] "RFC5" "DYNLL1" "MLEC"
## [1147] "OASL" "RHOF" "BCL7A"
## [1150] "HIP1R" "MPHOSPH9" "SETD8"
## [1153] "SNRNP35" "NCOR2" "LINC00944"
## [1156] "ULK1" "EP400NL" "ANKLE2"
## [1159] "LATS2" "SPATA13" "MTIF3"
## [1162] "FLT1" "HSPH1" "RGCC"
## [1165] "DNAJC15" "CCDC122" "TSC22D1"
## [1168] "GPALPP1" "GTF2F2" "RCBTB2"
## [1171] "FNDC3A" "PCDH9" "DIS3"
## [1174] "CLN5" "MYCBP2" "RBM26-AS1"
## [1177] "RAP2A" "UBAC2" "GPR183"
## [1180] "GGACT" "ARGLU1" "TNFSF13B"
## [1183] "MCF2L-AS1" "UPF3A" "CCNB1IP1"
## [1186] "RNASE6" "RNASE2" "METTL3"
## [1189] "HOMEZ" "CMTM5" "DHRS4"
## [1192] "MDP1" "GMPR2" "CIDEB"
## [1195] "LTB4R" "KHNYN" "SDR39U1"
## [1198] "GZMH" "GZMB" "SCFD1"
## [1201] "AP4S1" "HECTD1" "SRP54"
## [1204] "PPP2R3C" "CTAGE5" "L2HGDH"
## [1207] "PYGL" "PTGDR" "GCH1"
## [1210] "MAPK1IP1L" "LGALS3" "TMEM260"
## [1213] "RP11-349A22.5" "ARID4A" "JKAMP"
## [1216] "FNTB" "MAX" "ATP6V1D"
## [1219] "ARG2" "VTI1B" "RAD51B"
## [1222] "DCAF5" "PCNX" "DPF3"
## [1225] "ZNF410" "ENTPD5" "DLST"
## [1228] "MLH3" "C14orf1" "IFT43"
## [1231] "IRF2BPL" "GSTZ1" "AHSA1"
## [1234] "SEL1L" "SPATA7" "ITPK1"
## [1237] "IFI27" "DICER1" "DICER1-AS1"
## [1240] "TCL1B" "TCL1A" "RP11-164H13.1"
## [1243] "WARS" "RP11-796G6.2" "RP11-1017G21.5"
## [1246] "DYNC1H1" "ZNF839" "ANKRD9"
## [1249] "TRAF3" "CDC42BPB" "TNFAIP2"
## [1252] "EIF5" "SIVA1" "CRIP2"
## [1255] "AL928768.3" "KIAA0125" "NIPA2"
## [1258] "ATP10A" "KLF13" "ARHGAP11A"
## [1261] "EMC7" "ZNF770" "DPH6"
## [1264] "EIF2AK4" "RMDN3" "LRRC57"
## [1267] "TMEM62" "PDIA3" "CASC4"
## [1270] "PATL2" "DUOX1" "SPATA5L1"
## [1273] "SLC30A4" "BLOC1S6" "LYSMD2"
## [1276] "TMOD3" "RP11-430B1.2" "NEDD4"
## [1279] "LINC00926" "LIPC" "ADAM10"
## [1282] "FAM63B" "MYO1E" "NARG2"
## [1285] "RP11-219B17.1" "RORA" "TPM1"
## [1288] "RAB8B" "SNX22" "PPIB"
## [1291] "RP11-502I4.3" "RP11-1007O24.3" "EDC3"
## [1294] "COMMD4" "RP11-817O13.8" "SIN3A"
## [1297] "IMP3" "SCAPER" "RCN2"
## [1300] "IL16" "TM6SF1" "HAPLN3"
## [1303] "FANCI" "IDH2" "UNC45A"
## [1306] "HDDC3" "RCCD1" "FAM174B"
## [1309] "ARRDC4" "LYSMD4" "VIMP"
## [1312] "POLR3K" "HBA1" "LUC7L"
## [1315] "MRPL28" "NME4" "C16orf13"
## [1318] "FAM173A" "RPUSD1" "LMF1"
## [1321] "HAGH" "NDUFB10" "GFER"
## [1324] "PGP" "PDPK1" "FLYWCH1"
## [1327] "ZNF263" "NAA60" "CLUAP1"
## [1330] "CDIP1" "METTL22" "CARHSP1"
## [1333] "DEXI" "TNFRSF17" "RP11-680G24.5"
## [1336] "FOPNL" "KNOP1" "TMEM159"
## [1339] "IGSF6" "POLR3E" "TNRC6A"
## [1342] "IL4R" "GTF3C1" "EIF3C"
## [1345] "RABEP2" "CD19" "SPNS1"
## [1348] "C16orf54" "TMEM219" "TAOK2"
## [1351] "SLX1A" "ZNF771" "ZNF747"
## [1354] "PHKG2" "ORAI3" "STX4"
## [1357] "VKORC1" "PYCARD" "TGFB1I1"
## [1360] "SLC5A2" "ORC6" "DNAJA2"
## [1363] "LONP2" "SNX20" "AKTIP"
## [1366] "GNAO1" "RP11-413H22.2" "MT1E"
## [1369] "MT1F" "GPR56" "KIFC3"
## [1372] "C16orf80" "SETD6" "LINC00920"
## [1375] "CMTM3" "CES4A" "LRRC29"
## [1378] "ACD" "SLC7A6" "CIRH1A"
## [1381] "NFAT5" "DDX19A" "ST3GAL2"
## [1384] "COG4" "ATXN1L" "PSMD7"
## [1387] "RP11-252A24.7" "KARS" "TERF2IP"
## [1390] "COTL1" "IRF8" "RP11-542M13.3"
## [1393] "MVD" "AC137932.6" "SPG7"
## [1396] "CHMP1A" "ZNF276" "DEF8"
## [1399] "GAS8" "RPH3AL" "TIMM22"
## [1402] "CRK" "PITPNA-AS1" "PRPF8"
## [1405] "P2RX5" "ARRB2" "PSMB6"
## [1408] "GP1BA" "AC109333.10" "ZNF232"
## [1411] "CTC-524C5.2" "DERL2" "MED31"
## [1414] "ALOX12" "CLEC10A" "ASGR2"
## [1417] "ASGR1" "KCTD11" "SENP3"
## [1420] "SOX15" "CYB5D1" "C17orf59"
## [1423] "RP11-214O1.3" "RP11-138I1.2" "ZNF287"
## [1426] "TNFRSF13B" "NT5M" "SREBF1"
## [1429] "TRIM16L" "GRAP" "AKAP10"
## [1432] "USP22" "MAP2K3" "KIAA0100"
## [1435] "PROCA1" "RAB34" "NEK8"
## [1438] "GOSR1" "ADAP2" "RNF135"
## [1441] "UTP6" "CCL2" "SLFN12"
## [1444] "RP11-1094M14.11" "CCL5" "CCL3"
## [1447] "CCL4" "AC131056.3" "ZNHIT3"
## [1450] "GGNBP2" "MRM1" "AATF"
## [1453] "SYNRG" "MLLT6" "CISD3"
## [1456] "PIP4K2B" "CACNB1" "PGAP3"
## [1459] "RP11-94L15.2" "GSDMB" "CTD-2267D19.2"
## [1462] "TOP2A" "RP5-1028K7.2" "CTD-3193K9.4"
## [1465] "VPS25" "RUNDC1" "IFI35"
## [1468] "DHX8" "DUSP3" "RP11-527L4.5"
## [1471] "ITGA2B" "HEXIM2" "FMNL1"
## [1474] "RP11-798G7.6" "NSF" "ITGB3"
## [1477] "OSBPL7" "CALCOCO2" "ABI3"
## [1480] "PDK2" "ABCC3" "TOB1"
## [1483] "MMD" "NOG" "SEPT4"
## [1486] "YPEL2" "BCAS3" "MED13"
## [1489] "PSMC5" "CD79B" "RP11-401F2.3"
## [1492] "NOL11" "KIF19" "CTD-2006K23.1"
## [1495] "RAB37" "NAT9" "ATP5H"
## [1498] "ARMC7" "NT5C" "MRPS7"
## [1501] "GRB2" "SMIM5" "PRPSAP1"
## [1504] "SPHK1" "ST6GALNAC2" "JMJD6"
## [1507] "SRSF2" "SEC14L1" "USP36"
## [1510] "LGALS3BP" "RNF213" "RPTOR"
## [1513] "SLC38A10" "MRPL12" "RFNG"
## [1516] "GPS1" "C17orf62" "FN3KRP"
## [1519] "USP14" "TYMS" "YES1"
## [1522] "MYOM1" "RP13-270P17.3" "VAPA"
## [1525] "RP11-820I16.4" "RP11-973H7.1" "FAM210A"
## [1528] "OSBPL1A" "ZNF397" "ZSCAN30"
## [1531] "INO80C" "C18orf21" "ATP5A1"
## [1534] "RAB27B" "FECH" "MALT1"
## [1537] "RP11-879F14.2" "CCDC102B" "CD226"
## [1540] "SOCS6" "SOX12" "RBCK1"
## [1543] "SDCBP2" "ITPA" "SMOX"
## [1546] "TRMT6" "CRLS1" "MKKS"
## [1549] "NDUFAF5" "MACROD2" "SNRPB2"
## [1552] "NAA20" "CD93" "CST3"
## [1555] "CST7" "APMAP" "PDRG1"
## [1558] "SNTA1" "ACTL10" "RALY"
## [1561] "GSS" "UQCC1" "RBM39"
## [1564] "AAR2" "MYL9" "DSN1"
## [1567] "SAMHD1" "MROH8" "RPN2"
## [1570] "BLCAP" "CTNNBL1" "MAFB"
## [1573] "SRSF6" "PKIG" "STK4"
## [1576] "UBE2C" "TP53RK" "NCOA3"
## [1579] "BCAS4" "ADNP" "KCNG1"
## [1582] "ATP9A" "ZBP1" "PMEPA1"
## [1585] "CTSZ" "TUBB1" "CABLES2"
## [1588] "TPD52L2" "DNAJC5" "LINC00176"
## [1591] "TCEA2" "RGS19" "PCMTD2"
## [1594] "TPGS1" "FSTL3" "PRSS57"
## [1597] "CNN2" "ABHD17A" "PLEKHJ1"
## [1600] "GADD45B" "AES" "MFSD12"
## [1603] "TBXA2R" "CACTIN" "AC016586.1"
## [1606] "SIRT6" "CCDC94" "ALKBH7"
## [1609] "CRB3" "PCP2" "RETN"
## [1612] "CERS4" "CD320" "RAB11B"
## [1615] "MARCH2" "ICAM1" "CDC37"
## [1618] "S1PR5" "C19orf52" "TMEM205"
## [1621] "ELOF1" "ZNF439" "FBXW9"
## [1624] "HOOK2" "CACNA1A" "SAMD1"
## [1627] "PRKACA" "AKAP8" "TPM4"
## [1630] "FAM32A" "CHERP" "MVB12A"
## [1633] "FAM129C" "COLGALT1" "IFI30"
## [1636] "JUND" "RFXANK" "ZNF430"
## [1639] "ZNF738" "ZNF43" "ZNF675"
## [1642] "PLEKHF1" "ZNF792" "ARHGAP33"
## [1645] "TYROBP" "POLR2I" "LINC00665"
## [1648] "ZNF529" "AC092295.7" "ZNF382"
## [1651] "AC074138.3" "ZNF567" "ZNF383"
## [1654] "SPINT2" "PPP1R14A" "C19orf33"
## [1657] "KCNK6" "CATSPERG" "RASGRP4"
## [1660] "ACTN4" "LRFN1" "TIMM50"
## [1663] "FCGBP" "PLD3" "BLVRB"
## [1666] "TMEM91" "ATP5SL" "CD79A"
## [1669] "GSK3A" "PHLDB3" "TOMM40"
## [1672] "RELB" "PPM1N" "BBC3"
## [1675] "PRR24" "C5AR1" "DHX34"
## [1678] "NAPA-AS1" "GLTSCR1" "SEPW1"
## [1681] "PLA2G4C" "LIG1" "EMP3"
## [1684] "CYTH2" "CTC-273B12.10" "TRPM4"
## [1687] "IL4I1" "ZNF473" "EMC10"
## [1690] "NKG7" "ZNF175" "FPR1"
## [1693] "ZNF836" "ZNF766" "ZNF528"
## [1696] "ZNF611" "ZNF845" "OSCAR"
## [1699] "MBOAT7" "LILRA4" "LAIR2"
## [1702] "KIR3DL1" "KIR2DL3" "TNNT1"
## [1705] "ISOC2" "EPN1" "ZNF773"
## [1708] "ZNF671" "ZNF606" "AC010642.1"
## [1711] "TTTY15" "EIF1AY" "TPTEP1"
## [1714] "CECR5" "PEX26" "MRPL40"
## [1717] "CLDN5" "SEPT5" "C22orf29"
## [1720] "THAP7" "SDF2L1" "IGLL5"
## [1723] "VPREB3" "DDT" "GSTT1"
## [1726] "TPST2" "TTC28" "XBP1"
## [1729] "EWSR1" "NF2" "OSM"
## [1732] "SMTN" "PISD" "FBXO7"
## [1735] "LARGE" "MCM5" "PVALB"
## [1738] "IL2RB" "RAC2" "LGALS2"
## [1741] "SH3BP1" "LGALS1" "DDX17"
## [1744] "TOMM22" "JOSD1" "DNAL4"
## [1747] "APOBEC3A" "APOBEC3B" "APOBEC3H"
## [1750] "ATF4" "CACNA1I" "GRAP2"
## [1753] "ADSL" "NHP2L1" "CTA-250D10.23"
## [1756] "TNFRSF13C" "FAM109B" "SMDT1"
## [1759] "ARFGAP3" "PACSIN2" "PARVB"
## [1762] "PRR5" "CITF22-92A6.1" "TTC38"
## [1765] "CTA-29F11.1" "TBC1D22A" "LL22NC03-75H12.2"
## [1768] "ZBED4" "ALG12" "CRELD2"
## [1771] "TYMP" "RABL2B" "NRIP1"
## [1774] "C21orf91" "MAP3K7CL" "MIS18A"
## [1777] "C21orf59" "IL10RB" "ATP5O"
## [1780] "CBR1" "DSCR3" "WRB"
## [1783] "MX1" "C2CD2" "AGPAT3"
## [1786] "AP001055.6" "ICOSLG" "LRRC3"
## [1789] "SUMO3" "FAM207A" "SLC19A1"
## [1792] "S100B" "MT-ND6"
Rather than focusing on specific differentially expressed genes, a staple in scRNA-Seq analyses involves dimension reduction. We have already analyzed the mean-variance relationship and discarded outliers. Now we can scale the data and regress out the confounders we looked at earlier. Finally, we’ll run PCA based on the information from variable genes deteced before.
# For this sample, we regress on the number of detected molecules per cell
# and the percentage mitochondrial gene content
# This step would take several minutes
pbmc <- ScaleData(pbmc,vars.to.regress = c('nUMI','percent.mito'))##
## Time Elapsed: 10.6055631637573 secs
pbmc <- RunPCA(pbmc, pc.genes = pbmc@var.genes, do.print = FALSE, pcs.print = 5, genes.print = 5)Seurat provides several useful ways of visualizing both cells and genes that define the PCA, including PrintPCA(), VizPCA(), PCAPlot(), and PCHeatmap()
# Examine and visualize PCA results a few different ways
# Print out genes and cells that define each PCs
PrintPCA(pbmc, pcs.print = 1:5, genes.print = 5, use.full = FALSE)## [1] "PC1"
## [1] "CST3" "FCN1" "AIF1" "TYROBP" "S100A8"
## [1] ""
## [1] "PTPRCAP" "LTB" "CD3D" "CXCR4" "AES"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "NKG7" "GZMB" "PRF1" "CST7" "GZMA"
## [1] ""
## [1] "CD79A" "MS4A1" "HLA-DQA1" "CD79B" "HLA-DQB1"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "PPBP" "PF4" "GNG11" "SDPR" "HIST1H2AC"
## [1] ""
## [1] "NKG7" "PRF1" "GZMB" "CST7" "FGFBP2"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "CD3D" "FYB" "RGCC" "CD27" "NDFIP1"
## [1] ""
## [1] "CD79A" "HLA-DQA1" "CD79B" "HLA-DPB1" "HLA-DQB1"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "CD14" "S100A8" "S100A12" "LGALS2" "GPX1"
## [1] ""
## [1] "FCGR3A" "BATF3" "ABI3" "IFITM3" "CSF1R"
## [1] ""
## [1] ""
VizPCA(pbmc, 1:2)PCAPlot(pbmc, 1, 2)# PCHeatmap can be useful
PCHeatmap(pbmc, pc.use = 1, cells.use = 500, do.balanced = TRUE,
label.columns = TRUE, use.full = FALSE)PCHeatmap(pbmc, pc.use = 1:12, cells.use = 500, do.balanced = TRUE,
label.columns = FALSE, use.full = FALSE)Which principal components are statistically significant? Comment on one or more approaches to determine this. - JackStrawPlot: comparing the distribution of p-values for each PC with a uniform distribution - PCElbowPlot: plot the standard deviations of the PCs
# This step would take several minutes
pbmc <- JackStraw(object = pbmc, num.replicate = 100, display.progress = FALSE)
JackStrawPlot(object = pbmc, PCs = 1:12)## An object of class seurat in project 10X_PBMC
## 12858 genes across 1959 samples.
PCElbowPlot(pbmc) A more ad hoc method for determining which PCs to use is to look at a plot of the standard deviations of the principle components and draw your cutoff where there is a clear elbow in the graph. This can be done with PCElbowPlot(). In this example, it looks like the elbow would fall around PC 9.
Use the FindClusters command to determine sample modules in the PBMC data. Comment on the type of clustering performed. Is it supervised or unsupervised?
pbmc <- FindClusters(object = pbmc, reduction.type = "pca", dims.use = 1:10,
resolution = 0.6, print.output = 0, save.SNN = TRUE)
# save.SNN = T saves the SNN so that the clustering algorithm can be rerun
# using the same graph but with a different resolution value Details are in the Seurat source code as well as several paragraphs in the vignette. It is supervised.
A popular method for displaying scRNA-Seq data is by creating two dimensions using tSNE (t-distributed stochastic neighbor embedding). Run and visualize tSNE for this data. Comment on how this approach is different than PCA.
set.seed(115) # However, the distance between two points would not be changed.
pbmc <- RunTSNE(pbmc, dims.use = 1:10, do.fast = TRUE)TSNEPlot(pbmc,do.label = TRUE) Note: tSNE is, by definition, a stochastic process so be sure to cache your data at this point or save the file image before re-running later steps! Main difference is linear/non linear effects; tSNE in this case is using the PCs as input
Now that we’ve defined data-driven clusters, we’d like to identify markers that define clusters via differential expression. What markers distinguish cluster 2? What markers distinguish cluster 2 from cluster 4? Every cluster from all others.
# find all markers of cluster 2
cluster2.markers <- FindMarkers(pbmc, ident.1 = 2, min.pct = 0.25)
print(head(cluster2.markers, 5))## p_val avg_logFC pct.1 pct.2 p_val_adj
## CD79A 0.000000e+00 2.990788 0.931 0.036 0.000000e+00
## MS4A1 3.455029e-249 2.330351 0.811 0.044 4.442476e-245
## CD79B 1.302849e-215 2.468962 0.919 0.130 1.675204e-211
## HLA-DQA1 3.247087e-199 2.120703 0.880 0.111 4.175104e-195
## TCL1A 2.113028e-188 2.573629 0.591 0.022 2.716931e-184
# find all markers distinguishing cluster 2 from clusters 4
cluster24.markers <- FindMarkers(pbmc, ident.1 = 2, ident.2 = 4, min.pct = 0.25)
print(head(cluster24.markers, 5))## p_val avg_logFC pct.1 pct.2 p_val_adj
## FCER1G 1.387216e-73 -3.275942 0.066 0.992 1.783682e-69
## AIF1 2.342478e-73 -3.374571 0.085 1.000 3.011959e-69
## TYROBP 3.704399e-73 -3.237095 0.081 1.000 4.763116e-69
## LST1 8.195670e-71 -3.312854 0.124 1.000 1.053799e-66
## FCGR3A 1.038593e-70 -2.875686 0.039 0.952 1.335423e-66
# find all markers distinguishing cluster 2 from clusters 1 and 4
cluster2_14.markers <- FindMarkers(pbmc, ident.1 = 2, ident.2 = c(1,4) , min.pct = 0.25)
print(head(cluster2_14.markers, 5))## p_val avg_logFC pct.1 pct.2 p_val_adj
## CD79A 5.052597e-136 3.031870 0.931 0.028 6.496629e-132
## TYROBP 3.334760e-115 -3.377390 0.081 0.998 4.287834e-111
## S100A4 1.353555e-113 -2.840829 0.340 1.000 1.740401e-109
## FTL 5.041023e-112 -2.226926 0.981 1.000 6.481748e-108
## CST3 8.265446e-112 -3.163959 0.147 0.994 1.062771e-107
# find markers for every cluster compared to all remaining cells, report
# only the positive ones
pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
pbmc.markers %>% group_by(cluster) %>% top_n(2, avg_logFC) -> pbmc.markers.top2
pbmc.markers.top2## # A tibble: 16 x 7
## # Groups: cluster [8]
## p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 1.19e-169 1.26 0.916 0.457 1.53e-165 0 LDHB
## 2 1.56e-107 1.06 0.665 0.176 2.01e-103 0 IL7R
## 3 1.32e-301 3.74 0.984 0.106 1.70e-297 1 S100A8
## 4 7.91e-255 3.65 0.987 0.201 1.02e-250 1 S100A9
## 5 0. 2.99 0.931 0.036 0. 2 CD79A
## 6 2.11e-188 2.57 0.591 0.022 2.72e-184 2 TCL1A
## 7 2.04e-179 2.35 0.971 0.192 2.63e-175 3 CCL5
## 8 9.74e-134 2.20 0.527 0.036 1.25e-129 3 GZMK
## 9 5.76e-138 2.25 0.952 0.124 7.40e-134 4 FCGR3A
## 10 5.27e- 94 2.06 1 0.313 6.78e- 90 4 LST1
## 11 1.33e-193 3.27 0.991 0.074 1.71e-189 5 GZMB
## 12 6.75e-154 3.69 1 0.119 8.68e-150 5 GNLY
## 13 9.03e-286 2.75 1 0.007 1.16e-281 6 FCER1A
## 14 1.94e- 28 2.11 0.964 0.202 2.49e- 24 6 HLA-DQA1
## 15 1.44e-176 4.56 1 0.01 1.85e-172 7 PF4
## 16 2.38e-110 5.46 1 0.02 3.06e-106 7 PPBP
FeaturePlot(object = pbmc, features.plot = pbmc.markers.top2$gene, cols.use = c("grey", "blue"), reduction.use = "tsne", nCol = 4)Using the biomarkers identified above, select a few markers to distinguish the various subgroups. Try plotting different measurements, including raw and normalized counts on/not on the log scale.
VlnPlot(pbmc, c("CD79A","LST1"))VlnPlot(pbmc, c("CD79A","LST1"), use.raw = TRUE, y.log = TRUE)We can also generate an expression heatmap for given cells and genes.
# Plot the top 10 markers for each cluster
top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(10, avg_logFC)
DoHeatmap(object = pbmc, genes.use = top10$gene, slim.col.label = TRUE, remove.key = TRUE)Using the table below, identify which clusters correspond to which cell subtypes in your tSNE projection. Do you observe any rare populations or mixed populations? Explore some other markers to characterize the behavior of these populations.
| Cluster ID | Markers | Cell Type |
|---|---|---|
| ? | IL7R | CD4 T cells |
| ? | CD14, LYZ | CD14+ Monocytes |
| ? | MS4A1 | B cells |
| ? | CD8A | CD8 T cells |
| ? | FCGR3A, MS4A7 | FCGR3A+ Monocytes |
| ? | GNLY, NKG7 | NK cells |
| ? | FCER1A, CST3 | Dendritic Cells |
| ? | PPBP | Megakaryocytes |
FeaturePlot(object = pbmc, features.plot = pbmc.markers.top2$gene, cols.use = c("grey", "blue"), reduction.use = "tsne", nCol = 4)Using the inference above, annotate your tSNE with the cell type names.
current.cluster.ids <- c(0, 1, 2, 3, 4, 5, 6, 7)
new.cluster.ids <- c("CD4 T cells", "CD14+ Monocytes", "B cells",
"CD8 T cells", "FCGR3A+ Monocytes", "NK cells",
"Dendritic cells", "Megakaryocytes")
pbmc@ident <- plyr::mapvalues(pbmc@ident, from = current.cluster.ids, to = new.cluster.ids)
TSNEPlot(pbmc, do.label = TRUE, pt.size = 0.5)Singel cell RNA-seq
For this exercise, we will be analyzing a single cell RNA-Seq dataset of human peripheral blood mononuclear cells (PBMC) from the 10X Genomics platform. There are 2,700 single cells that were sequenced on the Illumina NextSeq 500. The raw data can be found at: https://support.10xgenomics.com/single-cell/datasets/.
library(Seurat)
library(dplyr)
pbmc.data <- Read10X(data.dir = "./Data/filtered_gene_bc_matrices/hg19/")
# Print number of genes
# Print number of samples
# Calculate the dropout rate: count all 0 in pbmc.data, divided by ncol*crow#Hint: CreateSeuratObject
# Compare number of genes
# Compare number of samples
# Compare the dropout rate: count all 0 in pbmc.data, divided by ncol*crow
pbmc.data.temp <- CreateSeuratObject(raw.data=pbmc.data,min.cells = 3, min.genes = 200,project = "10X_PBMC")# All commands are on the QC slides
mito.genes <- grep(pattern = "^MT-", x = rownames(x = pbmc@data), value = TRUE)
mito.total <- sum(pbmc@raw.data[mito.genes, ])/sum(pbmc@raw.data)
mito.percent <- Matrix::colSums(pbmc@raw.data[mito.genes, ])/Matrix::colSums(pbmc@raw.data)
pbmc <- AddMetaData(object = pbmc, metadata = mito.percent , col.name = "percent.mito")
# Display the violin plot of nGene, nUMI, percent.mito after all these QC procedure
VlnPlot(object = pbmc, features.plot = c("nGene", "nUMI", "percent.mito"), nCol = 3)
# Use FilterCells command to filter cells with extremely high coverage# Normalize the data: NormalizeData
# Detect variable genes: FindVariableGenes
# Print number of variable genes
# Removing unwanted variances: ScaleData with nUMI, percent.mito
# Linear reduction: RunPCA
# Detect significant PCs: JackStraw, JackStrawPlot, PrintPCA, PCElbowPlot
# Calculate the explained variances
eigenvals <- (pbmc@dr$pca@sdev)^2
var.explained <- cumsum(eigenvals) / sum(eigenvals)
var.explained[11]pbmc.cellcycle <- CellCycleScoring(
object = pbmc,
g2m.genes = cc.genes$g2m.genes,
s.genes = cc.genes$s.genes,set.ident = TRUE)
VlnPlot(pbmc.cellcycle, c("nGene", "S.Score", "G2M.Score"), nCol = 3)
cc.genes <- c(cc.genes$g2m.genes,cc.genes$s.genes)
top.gene.num.list = c(10,50,100,200,500)
top.gene.11pca.percent = matrix(rep(0,length(top.gene.num.list)*11),ncol = 11)
for (j in 1:length(top.gene.num.list)){
top.gene.11pca = matrix(rep("a",top.gene.num.list[j]*11),ncol = 11)
for (i in 1:11){
top.gene.11pca[,i] = DimTopGenes(pbmc,dim.use = i,reduction.type = "pca", num.genes = top.gene.num.list[j],do.balanced = TRUE)
}
for (i in 1:11){
top.gene.11pca.percent[j,i] = length(intersect(top.gene.11pca[,i],cc.genes))
}
}
colnames(top.gene.11pca.percent) = paste0("PC",as.character(1:11),sep = "")
rownames(top.gene.11pca.percent) = paste0("top.gene",as.character(top.gene.num.list),sep = "")
knitr::kable(top.gene.11pca.percent)# Find cluster: FindClusters
# Run tSNE analysis: RunTSNE
# Visualization: TSNEPlot
# Visualization: PCAPlot# Hint: change the seed.use command in the RunTSNE function for several times
# Compare the tSNE plots# Hint: change the resolution command in the RunTSNE function for several times
# Compare the tSNE plotscell.assigment = t(as.matrix(table(pbmc@meta.data$res.0.6)))
colnames(cell.assigment) = names(table(pbmc@meta.data$res.0.6))
rownames(cell.assigment) = "Cell Number"
knitr::kable(cell.assigment, align = "c")
# Hint: Use FindAllMarkers function
# Visualization: FeaturePlot# Cluster genes: FindClusters
# Reannotate the cluster ids
# Display tSNE plot: TSNEPlot
# See slides related to cluster biomarkers for details.Please submit your solution directly on the canvas website. Please provide both your code in this Rmd document and an html file for your final write-up. Please pay attention to the clarity and cleanness of your homework.
The teaching fellows will grade your homework and give the grades with feedback through canvas within one week after the due date. Some of the questions might not have a unique or optimal solution. TFs will grade those according to your creativity and effort on exploration, especially in the graduate-level questions.